{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2024-07-15T01:54:50.714937900Z",
     "start_time": "2024-07-15T01:54:45.383351500Z"
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "from sklearn.datasets import fetch_california_housing\n",
    "from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge, LogisticRegression, Lasso\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.metrics import mean_squared_error, classification_report, roc_auc_score\n",
    "import joblib\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "获取特征值\n",
      "(20640, 8)\n",
      "--------------------------------------------------\n",
      "[   8.3252       41.            6.98412698    1.02380952  322.\n",
      "    2.55555556   37.88       -122.23      ]\n",
      "目标值\n",
      "[4.526 3.585 3.521 ... 0.923 0.847 0.894]\n",
      ".. _california_housing_dataset:\n",
      "\n",
      "California Housing dataset\n",
      "--------------------------\n",
      "\n",
      "**Data Set Characteristics:**\n",
      "\n",
      ":Number of Instances: 20640\n",
      "\n",
      ":Number of Attributes: 8 numeric, predictive attributes and the target\n",
      "\n",
      ":Attribute Information:\n",
      "    - MedInc        median income in block group\n",
      "    - HouseAge      median house age in block group\n",
      "    - AveRooms      average number of rooms per household\n",
      "    - AveBedrms     average number of bedrooms per household\n",
      "    - Population    block group population\n",
      "    - AveOccup      average number of household members\n",
      "    - Latitude      block group latitude\n",
      "    - Longitude     block group longitude\n",
      "\n",
      ":Missing Attribute Values: None\n",
      "\n",
      "This dataset was obtained from the StatLib repository.\n",
      "https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html\n",
      "\n",
      "The target variable is the median house value for California districts,\n",
      "expressed in hundreds of thousands of dollars ($100,000).\n",
      "\n",
      "This dataset was derived from the 1990 U.S. census, using one row per census\n",
      "block group. A block group is the smallest geographical unit for which the U.S.\n",
      "Census Bureau publishes sample data (a block group typically has a population\n",
      "of 600 to 3,000 people).\n",
      "\n",
      "A household is a group of people residing within a home. Since the average\n",
      "number of rooms and bedrooms in this dataset are provided per household, these\n",
      "columns may take surprisingly large values for block groups with few households\n",
      "and many empty houses, such as vacation resorts.\n",
      "\n",
      "It can be downloaded/loaded using the\n",
      ":func:`sklearn.datasets.fetch_california_housing` function.\n",
      "\n",
      ".. topic:: References\n",
      "\n",
      "    - Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions,\n",
      "      Statistics and Probability Letters, 33 (1997) 291-297\n",
      "\n",
      "--------------------------------------------------\n",
      "['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']\n"
     ]
    }
   ],
   "source": [
    "\"\"\"\n",
    "线性回归直接预测房子价格\n",
    ":return: None\n",
    "\"\"\"\n",
    "# 获取数据\n",
    "fe_cal = fetch_california_housing(data_home='data')\n",
    "\n",
    "print(\"获取特征值\")\n",
    "print(fe_cal.data.shape)\n",
    "print('-' * 50)\n",
    "print(fe_cal.data[0])\n",
    "print(\"目标值\")\n",
    "print(fe_cal.target) #单位是10万美金\n",
    "print(fe_cal.DESCR)\n",
    "print('-' * 50)\n",
    "print(fe_cal.feature_names) #特征列的名字"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-13T02:44:48.264648100Z",
     "start_time": "2024-07-13T02:44:48.200121500Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "MedInc - 中位收入（Median Income）\n",
    "HouseAge - 房屋年龄（House Age）\n",
    "AveRooms - 平均房间数（Average Number of Rooms）\n",
    "AveBedrms - 平均卧室数（Average Number of Bedrooms）\n",
    "Population - 人口数量（Population）\n",
    "AveOccup - 平均居住人数（Average Occupancy）\n",
    "Latitude - 纬度（Latitude）\n",
    "Longitude - 经度（Longitude）"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "outputs": [
    {
     "data": {
      "text/plain": "(20640,)"
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fe_cal.target.shape"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-13T02:44:48.276641100Z",
     "start_time": "2024-07-13T02:44:48.262656Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(15480, 8)\n"
     ]
    }
   ],
   "source": [
    "# 分割数据集到训练集和测试集\n",
    "x_train, x_test, y_train, y_test = train_test_split(fe_cal.data, fe_cal.target, test_size=0.25, random_state=1)\n",
    "#\n",
    "print(x_train.shape)\n",
    "#\n",
    "# # 进行标准化处理(?) 目标值处理？\n",
    "# # 特征值和目标值是都必须进行标准化处理, 实例化两个标准化API\n",
    "std_x = StandardScaler()\n",
    "#\n",
    "x_train = std_x.fit_transform(x_train) #训练集标准化\n",
    "x_test = std_x.transform(x_test) #测试集标准化\n",
    "\n",
    "# 目标值进行了标准化，暂时没有对目标值进行标准化处理\n",
    "# std_y = StandardScaler()\n",
    "#\n",
    "# temp = y_train.reshape(-1, 1) #-1代表把剩余的元素都堆到哪一维\n",
    "#\n",
    "# #标签进行标准化\n",
    "# # 目标值是一维的，这里需要传进去2维的\n",
    "# y_train = std_y.fit_transform(y_train.reshape(-1, 1))\n",
    "# print(y_train.shape)\n",
    "# y_test = std_y.transform(y_test.reshape(-1, 1))\n",
    "# print(y_test.shape)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-13T02:44:48.356640300Z",
     "start_time": "2024-07-13T02:44:48.273642600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(3,)\n"
     ]
    },
    {
     "data": {
      "text/plain": "(3, 1)"
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test1=np.array([1,2,3])\n",
    "print(test1.shape)\n",
    "test1.reshape(-1,1).shape"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-13T02:44:48.358640Z",
     "start_time": "2024-07-13T02:44:48.298878500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "回归系数 [ 0.83167028  0.12159502 -0.26758589  0.30983997 -0.00518054 -0.04040421\n",
      " -0.90736902 -0.88212727]\n",
      "正规方程测试集里面每个房子的预测价格： [2.12391852 0.93825754 2.7088455  1.70873764 2.82954754 3.50376456\n",
      " 3.0147162  1.62781292 1.74317518 2.01897806]\n",
      "正规方程的均方误差： 0.5356532845422556\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "# # estimator预测\n",
    "# # # 正规方程求解方式预测结果，正规方程进行线性回归\n",
    "lr = LinearRegression()\n",
    "# fit是耗时的\n",
    "lr.fit(x_train, y_train)\n",
    "#回归系数可以看特征与目标之间的相关性\n",
    "print('回归系数', lr.coef_)\n",
    "#\n",
    "y_predict = lr.predict(x_test)\n",
    "# 预测测试集的房子价格，通过inverse得到真正的房子价格\n",
    "# y_lr_predict = std_y.inverse_transform(y_predict)\n",
    "# 保存训练好的模型，模型中保存的是w的值，也保存了模型结构\n",
    "#保存模型放在fit之后即可\n",
    "os.unlink('./tmp/test.pkl') # 删除之前的模型文件\n",
    "joblib.dump(lr, \"./tmp/test.pkl\")\n",
    "print(\"正规方程测试集里面每个房子的预测价格：\", y_predict[0:10])\n",
    "#下面是求测试集的损失，用均方误差，公式是(y_test-y_predict)^2/n\n",
    "print(\"正规方程的均方误差：\", mean_squared_error(y_test, y_predict))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-13T02:44:48.479442100Z",
     "start_time": "2024-07-13T02:44:48.306667700Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 2 加载保存的模型"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "保存的模型预测的结果： [2.12391852 0.93825754 2.7088455  ... 1.24263061 2.73771901 1.75800594]\n",
      "正规方程的均方误差： 0.5356532845422556\n"
     ]
    }
   ],
   "source": [
    "#模拟上线时加载模型\n",
    "model = joblib.load(\"./tmp/test.pkl\")\n",
    "# # 因为目标值进行了标准化，一定要把预测后的值逆向转换回来\n",
    "y_predict = model.predict(x_test)\n",
    "\n",
    "#\n",
    "print(\"保存的模型预测的结果：\", y_predict)\n",
    "print(\"正规方程的均方误差：\", mean_squared_error(y_test, y_predict))\n",
    "\n",
    "# print(\"正规方程inverse后的均方误差：\", mean_squared_error(std_y.inverse_transform(y_test),\n",
    "#                                                std_y.inverse_transform(y_predict)))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-13T02:44:48.487437300Z",
     "start_time": "2024-07-13T02:44:48.385109200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "outputs": [
    {
     "data": {
      "text/plain": "0.375"
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_true = [3, -0.5, 2, 7]\n",
    "y_pred = [2.5, 0.0, 2, 8]\n",
    "mean_squared_error(y_true, y_pred)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-13T02:44:48.488437300Z",
     "start_time": "2024-07-13T02:44:48.408988400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "outputs": [
    {
     "data": {
      "text/plain": "0.375"
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#人工求均方误差\n",
    "(np.square(3 - 2.5) + np.square(0.5) + 1) / 4\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-13T02:44:48.489437600Z",
     "start_time": "2024-07-13T02:44:48.421562400Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 3 线性回归之梯度下降去进行房价预测"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "梯度下降的回归系数 [ 0.81355506  0.10499394 -0.23356786  0.31334747 -0.01303565 -0.01679472\n",
      " -0.9117157  -0.86697705]\n",
      "梯度下降的均方误差： 0.5355432396304047\n"
     ]
    }
   ],
   "source": [
    "# 梯度下降去进行房价预测,数据量大要用这个\n",
    "# learning_rate的不同方式，代表学习率变化的算法不一样,比如constant,invscaling,adaptive\n",
    "# 默认可以去调 eta0 = 0.008，会改变learning_rate的初始值\n",
    "# learning_rate='optimal',alpha是正则化力度，但是会影响学习率的值，由alpha来算学习率\n",
    "# penalty代表正则化，分为l1和l2\n",
    "# eta0=0.01, penalty='l2',max_iter=1000\n",
    "sgd = SGDRegressor(eta0=0.01,penalty='l2', max_iter=1000)\n",
    "# # 训练\n",
    "sgd.fit(x_train, y_train)\n",
    "#\n",
    "print('梯度下降的回归系数', sgd.coef_)\n",
    "#\n",
    "# 预测测试集的房子价格\n",
    "# y_sgd_predict = std_y.inverse_transform(sgd.predict(x_test).reshape(-1, 1))\n",
    "y_predict = sgd.predict(x_test)\n",
    "# print(\"梯度下降测试集里面每个房子的预测价格：\", y_sgd_predict)\n",
    "print(\"梯度下降的均方误差：\", mean_squared_error(y_test, y_predict))\n",
    "# print(\"梯度下降的原始房价量纲均方误差：\", mean_squared_error(std_y.inverse_transform(y_test), y_sgd_predict))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-13T02:45:47.402855200Z",
     "start_time": "2024-07-13T02:45:47.353904300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "w -3.8999999999999995 损失20.71999999999999\n",
      "w 4.919999999999999 损失65.17279999999998\n",
      "w -10.955999999999996 损失209.19987199999983\n",
      "w 17.620799999999992 损失675.8475852799994\n",
      "w -33.817439999999976 损失2187.786176307197\n",
      "w 58.77139199999995 损失7086.4672112353155\n",
      "w -107.8885055999999 损失22958.193764402422\n",
      "w 192.09931007999978 损失74382.58779666382\n",
      "w -347.87875814399956 损失240997.6244611907\n",
      "w 624.0817646591992 损失780830.3432542577\n",
      "w -1125.4471763865586 损失2529888.352143795\n",
      "w 2023.7049174958051 损失8196836.300945894\n",
      "w -3644.768851492449 损失26557747.65506469\n",
      "w 6558.483932686408 损失86047100.4424096\n",
      "w -11807.371078835533 损失278792603.4734071\n",
      "w 21251.167941903957 损失903288033.2938386\n",
      "w -38254.202295427116 损失2926653225.912036\n",
      "w 68855.4641317688 损失9482356449.994993\n",
      "w -123941.93543718383 损失30722834896.023777\n",
      "w 223093.38378693088 损失99541985061.15703\n",
      "w -401570.1908164755 损失322516031596.1886\n",
      "w 722824.2434696557 损失1044951942369.6908\n",
      "w -1301085.73824538 损失3385644293275.837\n",
      "w 2341952.228841684 损失10969487510211.748\n",
      "w -4215516.1119150305 损失35541139533084.09\n",
      "w 7587926.901447055 损失115153292087190.52\n",
      "w -13658270.5226047 损失373096666362495.4\n",
      "w 24584884.840688456 损失1208833199014482.5\n",
      "w -44252794.81323922 损失3916619564806921.0\n",
      "w 79655028.56383058 损失1.268984738997442e+16\n"
     ]
    }
   ],
   "source": [
    "w=1\n",
    "alpha=0.7\n",
    "def loss(w):\n",
    "    return 2*w**2+3*w+2\n",
    "def dao_shu(w):\n",
    "    return 4*w+3\n",
    "for i in range(30):\n",
    "    w=w-alpha*dao_shu(w)\n",
    "    print(f'w {w} 损失{loss(w)}')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-13T02:25:33.044037500Z",
     "start_time": "2024-07-13T02:25:33.035202800Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 4 岭回归"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 0.83166963  0.12159681 -0.26758236  0.30983534 -0.00517992 -0.04040432\n",
      " -0.90735215 -0.88211025]\n",
      "(5160,)\n",
      "岭回归的均方误差： 0.5356531179270396\n"
     ]
    }
   ],
   "source": [
    "# # # 岭回归去进行房价预测\n",
    "#岭回归是对线性回归加入L2正则化，L2正则化是对系数的平方和进行惩罚\n",
    "#alpha就是补偿的系数\n",
    "#正规方程求解，加补偿就可以让正规方程可逆\n",
    "rd = Ridge(alpha=0.02)\n",
    "\n",
    "rd.fit(x_train, y_train)\n",
    "\n",
    "print(rd.coef_)\n",
    "#\n",
    "# # 预测测试集的房子价格\n",
    "print(rd.predict(x_test).shape)\n",
    "# y_rd_predict = std_y.inverse_transform(rd.predict(x_test))\n",
    "y_predict = rd.predict(x_test)\n",
    "# print(\"岭回归里面每个房子的预测价格：\", y_rd_predict)\n",
    "\n",
    "print(\"岭回归的均方误差：\", mean_squared_error(y_test, y_predict))\n",
    "# print(\"岭回归的均方误差：\", mean_squared_error(std_y.inverse_transform(y_test), y_rd_predict))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-13T02:53:07.133036500Z",
     "start_time": "2024-07-13T02:53:07.108642500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(15480, 8)\n",
      "(15480,)\n",
      "[ 0.82655827  0.1225482  -0.25369194  0.29596304 -0.00381001 -0.03948424\n",
      " -0.89646842 -0.87060253]\n",
      "(5160,)\n",
      "--------------------------------------------------\n",
      "Lasso回归的均方误差： 0.5356324125105497\n"
     ]
    }
   ],
   "source": [
    "# # # Lasso回归去进行房价预测\n",
    "#alpha就是补偿的系数\n",
    "print(x_train.shape)\n",
    "print(y_train.shape)\n",
    "ls = Lasso(alpha=0.001)\n",
    "\n",
    "ls.fit(x_train, y_train)\n",
    "\n",
    "print(ls.coef_)\n",
    "#\n",
    "# # 预测测试集的房子价格\n",
    "print(ls.predict(x_test).shape)\n",
    "print('-'*50)\n",
    "# y_ls_predict = std_y.inverse_transform(ls.predict(x_test).reshape(-1,1))\n",
    "y_predict = ls.predict(x_test)\n",
    "# print(\"Lasso回归里面每个房子的预测价格：\", y_rd_predict)\n",
    "#\n",
    "print(\"Lasso回归的均方误差：\", mean_squared_error(y_test, y_predict))\n",
    "# print(\"Lasso回归的均方误差：\", mean_squared_error(std_y.inverse_transform(y_test), y_ls_predict))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-13T02:53:11.006314300Z",
     "start_time": "2024-07-13T02:53:10.574525600Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 5 逻辑回归"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [
    {
     "data": {
      "text/plain": "-0.6931471805599453"
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.log(0.5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-19T02:53:32.667592500Z",
     "start_time": "2024-04-19T02:53:32.602630600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [
    {
     "data": {
      "text/plain": "-1.2039728043259361"
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.log(0.3)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-19T02:54:58.294848500Z",
     "start_time": "2024-04-19T02:54:58.241875200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [
    {
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAAD4CAYAAADFAawfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAc80lEQVR4nO3deXScd33v8fdXGs1IM9pX25Jl2Y7teCFeIpKYkIXQQBKycCmEsJdyawKltxw4t7cczr29LW1PbzlQlqZcXODSsgQStpOGNIEkTuwQsshLvMW2vEuytdna9+V3/5iRIq8a25p5ntF8XufM0SPNY+n7y9if/PSd3+95zDmHiIj4V4bXBYiIyMUpqEVEfE5BLSLicwpqERGfU1CLiPhcIBHftLS01NXU1CTiW4uIzEpbt25td86Vne+5hAR1TU0NdXV1ifjWIiKzkpkdu9Bzan2IiPicglpExOemDWozW2ZmO6Y8us3ss0moTUREiKNH7ZzbD6wBMLNMoAn4ZWLLEhGRCZfa+ng7cMg5d8Gmt4iIzKxLDeoHgIcTUYiIiJxf3EFtZkHgXuDRCzy/wczqzKyura1tpuoTEUl7lzKjvhPY5pxrOd+TzrmNzrla51xtWdl512xP6xvP1PP8AYW8iMhUlxLUHyDBbY9vP3+IzQpqEZEzxBXUZhYBbgd+kchiwqEA/cNjifwRIiIpJ64t5M65PqAkwbUQDmbSPzya6B8jIpJSfLUzMRzUjFpE5Gw+C+pMBhTUIiJn8F1Q96n1ISJyBt8FtWbUIiJn8llQBzSjFhE5i8+CWjNqEZGz+S6o+4YU1CIiU/ksqAMMjIwxPu68LkVExDd8FtSZAAyMaFYtIjLBX0Edim6U1KYXEZE3+Cuos6Izam0jFxF5g6+COhKaCGrNqEVEJvgqqHOCan2IiJzNV0E98WaiWh8iIm/waVBrRi0iMsFnQT3R+tCMWkRkgq+COqIZtYjIOXwV1DkTQa1t5CIik3wV1GGt+hAROYevgjozwwgFMtSjFhGZIt67kBea2c/MbJ+ZvW5m6xNVUER3IhcROUNcdyEHvg486Zx7r5kFgXCiCsrJ0u24RESmmjaozawAuBn4IwDn3DAwnKiCIiHdPEBEZKp4Wh8LgTbg/5nZdjP7jplFzj7JzDaYWZ2Z1bW1tV12QTlBtT5ERKaKJ6gDwDrgW865tUAf8Jdnn+Sc2+icq3XO1ZaVlV12QeGsTL2ZKCIyRTxB3Qg0Oudejn3+M6LBnRCRUKZm1CIiU0wb1M65ZqDBzJbFvvR2YG+iClLrQ0TkTPGu+vgz4EexFR+HgY8nqqBIUK0PEZGp4gpq59wOoDaxpUTlBDO1hVxEZApf7UwEiAQD9I+M4ZzuRC4iAj4M6pxgJmPjjqHRca9LERHxBd8F9cSlTrXpRUQkyndBPXEFPW0jFxGJ8l1Q52hGLSJyBt8FdSSku7yIiEzlu6DOyVLrQ0RkKt8F9cSMWq0PEZEo3wV1ONaj7lNQi4gAvgzqaOtjQK0PERHAl0Edm1FrG7mICODLoI7NqEcU1CIi4MOgDgYyCGQYfUNqfYiIgA+DGmJX0NObiSIigE+DOhIM6JrUIiIxvgzqsGbUIiKT/BnUoUxteBERifFnUGcFtIVcRCTGn0GtGbWIyCR/BnUwU1vIRURi4rq5rZkdBXqAMWDUOZfQG92GgwHNqEVEYuIK6pi3OefaE1bJFNEZtXrUIiLg09aHNryIiLwh3qB2wG/MbKuZbTjfCWa2wczqzKyura3tioqKBAMMj44zOqY7kYuIxBvUb3XOrQPuBP7UzG4++wTn3EbnXK1zrrasrOyKipq4gl6/LswkIhJfUDvnmmIfW4FfAtclsqg3rkmtoBYRmTaozSxiZnkTx8A7gN2JLOqNa1LrDUURkXhWfVQAvzSzifN/7Jx7MpFFTbY+NKMWEZk+qJ1zh4HVSahl0kTrQ0EtIuLT5Xnh0MSMWq0PERF/BrVaHyIik/wZ1FlqfYiITPBnUKv1ISIyyZ9BrdaHiMgkXwZ1diATM+jXOmoREX8GdUaGkZOlCzOJiIBPgxqia6l18wARER8HdUV+iBOdA16XISLiOd8G9dKKPOpberwuQ0TEc74N6qvKcznRNUjP4IjXpYiIeMq3Qb20Ig+Ag629HlciIuIt3wb1kvJcAOoV1CKS5nwb1POLw4QCGepTi0ja821QZ2YYi8tyNaMWkbTn26AGWFqRS32LglpE0puvg3pJRR5NnQP0aiu5iKQxfwd17A1FrfwQkXTm76COLdHTG4oiks7iDmozyzSz7Wb2eCILmqq6OEwwkKE3FEUkrV3KjPrPgdcTVcj5TK780IxaRNJYXEFtZlXAu4DvJLaccy2tyOWAVn6ISBqLd0b9NeAvgPHElXJ+S8pzaeocoE8rP0QkTU0b1GZ2N9DqnNs6zXkbzKzOzOra2tpmrMAluuaHiKS5eGbUNwL3mtlR4CfAbWb2w7NPcs5tdM7VOudqy8rKZqxAXfNDRNLdtEHtnPuCc67KOVcDPAA865z7cMIri6kuDhPM1DU/RCR9+XodNUAgM4OrynPZc6Lb61JERDxxSUHtnHvOOXd3ooq5kBsWlfDq0dMMjugeiiKSfnw/owa4eWkpQ6PjvHzktNeliIgkXUoE9fULSwgGMthyYOZWk4iIpIqUCOqcYCbX1RSzuV5BLSLpJyWCGuCmJaUcaOmluWvQ61JERJIqZYL65qXRtdmaVYtIukmZoL56Th5leSG21Ld7XYqISFKlTFCbGTctKeWF+jbGxp3X5YiIJE3KBDXAzUvK6OgfYc+JLq9LERFJmpQK6rcuKQVgs5bpiUgaSamgLs0NsXJePs/sa/W6FBGRpEmpoAa4Z/U8th/v5FCbrqYnIukh5YL6PesqycwwHq1r9LoUEZGkSLmgLs/L5m3Lyvn5tkZGx5J+wxkRkaRLuaAGuL+2iraeIZ7Xm4oikgZSMqjfdnU5pbkhfvpqg9eliIgkXEoGdVZmBn+4rpJn97XS1jPkdTkiIgmVkkEN8L7aKkbHHb/a3uR1KSIiCZWyQX1VeR7rqgt5+JXj2lIuIrNaygY1wH+9aRGH2/t4YtdJr0sREUmYlA7qO1bOYUl5Lt98tp5xzapFZJaaNqjNLNvMXjGz18xsj5n9dTIKi0dGhvGZ267iQEsvT+1p9rocEZGEiGdGPQTc5pxbDawB7jCzGxJa1SW4+5p5LCqN8PVnNKsWkdlp2qB2URMX1siKPXyTiJmxWfW+5h6efr3F63JERGZcXD1qM8s0sx1AK/Bb59zL5zlng5nVmVldW1tydwzeu3oeC0rCmlWLyKwUV1A758acc2uAKuA6M1t1nnM2OudqnXO1ZWVlM1zmxQUyM/jc7UvZc6Kbn9Zpt6KIzC6XtOrDOdcJbALuSEg1V+De1fO4bmEx//jkPjr6hr0uR0RkxsSz6qPMzApjxznA7cC+BNd1ycyMv7lvJd2Do3z5N/u9LkdEZMbEM6OeC2wys53Aq0R71I8ntqzLc/WcfD62voaHXznOzsZOr8sREZkR8az62OmcW+ucu8Y5t8o59zfJKOxyffb2JZREQvzPX+3W9apFZFZI6Z2J55OfncX/umcFrzV28a3nDnldjojIFZt1QQ3RNxbvXT2Prz1Tz46GTq/LERG5IrMyqAG+9O5VzMnP5rM/2U7f0KjX5YiIXLZZG9QFOVl85f7VHDvdz5ce3+t1OSIil23WBjXADYtKePCWxfzk1QYe1UYYEUlRszqoAT5/+1LesriEL/5yN9uOd3hdjojIJZv1QR3IzOChD65jTkE2n/zBVpq7Br0uSUTkksz6oAYoigT514/W0jc0yid/UMfA8JjXJYmIxC0tghpg2Zw8/un9a9jZ1MWnf7SVEW2GEZEUkTZBDfDOlXP423evYtP+Nv7iZzt1SVQRSQkBrwtItg9dv4DO/hG+/NR+CnKy+Kt7VmBmXpclInJBaRfUAJ++dTGn+4b57gtHCAYy+MKdVyusRcS30jKozYwv3rWckbFxNm4+zPDouGbWIuJbaRnUEL2D+V/fu5JgZgbfeeEIQ6Pj/N27V5GRobAWEX9J26CG2Mz6XcsJBjL4l+cO0T0wwlfuX012VqbXpYmITErroIZoWP/3dy6jMJzF3z+xj9aeQf71o7UUhoNelyYiAqTZ8rwLMTM23LyYb35gLa81dPGeb73IsVN9XpclIgIoqM9wz+p5/OAT13Gqd5h7//l3bKlv87okEREF9dmuX1TCY5+5kbkF2Xzse6+wcfMhnNPGGBHxjoL6PBaURPj5p97CHavm8PdP7OPTP9pG18CI12WJSJqaNqjNbL6ZbTKzvWa2x8z+PBmFeS0SCvDQB9fxhTuv5rd7W3jXN7botl4i4ol4ZtSjwOedcyuAG4A/NbMViS3LH8yMT96ymEceXI9z8N5vvci/PHeQMV0jRESSaNqgds6ddM5tix33AK8DlYkuzE/WVRfxxH+7iXesrOAfn9zP/d/+PUfbtSpERJLjknrUZlYDrAVePs9zG8yszszq2tpm32qJgnAWD31wHV9/YA31LT3c+fUtfP93RzS7FpGEs3hXNJhZLvA88HfOuV9c7Nza2lpXV1c3A+X5U3PXIP/j5zt5/kAba6sL+Yf3XMOyOXlelyUiKczMtjrnas/3XFwzajPLAn4O/Gi6kE4Hcwqy+f7H38zX3r+Go+193P3NLXz5qX30D496XZqIzELxrPow4LvA6865rya+pNRgZrx7bSXPfP5W7rlmHg9tOsTtX93Mk7tPat21iMyoeGbUNwIfAW4zsx2xx10JritlFEeCfPX9a3jkk+vJyw7w4A+38ZHvvsK+5m6vSxORWSLuHvWlmO096gsZHRvnhy8d45+erqdncIT7a+fzuXcspTwv2+vSRMTnLtajVlAnQGf/MN989iD/9uJRgoEM/vjGhWy4ZRH52VlelyYiPqWg9sjR9j6+8tsD/MdrJyjIyeJTty7mo+sXEA6m/dVlReQsCmqP7W7q4stP7ef5A22URIJsuHkRH1Fgi8gUCmqf2HrsNF97up4t9e2URIL88VsX8uEbFlCQo5aISLpTUPvM1mOn+ednD7Jpfxt5oQAfXr+Aj7+lhvJ8vekokq4U1D61u6mLbz13iCd2nySQYdy3ppI/uWmRdjmKpCEFtc8dO9XHd184wiN1DQyOjHPjVSV8/C0Lue3qct0VXSRNKKhTREffMD9+5Tg/+P0xmrsHWVAS5kPXV/O+a+dTFNHNdkVmMwV1ihkZG+c3e1r4/otHePVoB8FABndfM5cPXV/Nuuoiorv6RWQ2UVCnsH3N3fzopeP8YlsjfcNjLCnP5YHrqvkvaysp1ixbZNZQUM8CfUOjPL7zBA+/0sCOhk6yMo23X13B+2qruGVpGYFM3f5SJJUpqGeZfc3dPFrXyK+2N3Gqb5jS3CD3rJ7He9ZWsaoyX60RkRSkoJ6lhkfHeW5/K7/c3sQzr7cyPDbO4rII962p5N7V86gpjXhdoojESUGdBjr7h/n1rpM8tuMELx85DcCbKgu4+5q53PWmucwvDntcoYhcjII6zZzsGuA/XjvB4ztPsrOxC4DV8wu5c9Uc7lw1hwUlmmmL+I2COo0dP9XP47tO8J+7mtnVFA3t5XPzeefKCt6xYg7L5+appy3iAwpqAaDhdD9P7m7mqT3NbD3egXMwvziHP1hewe3LK3jzwmKytHpExBMKajlHW88QT7/ewtN7W3jhYDtDo+PkZQe4eWkZty0r59ZlZZTkhrwuUyRtKKjlovqHR9lS386mfa08s6+Vtp4hzOCaygJujYX2NVWFZOq6IyIJc0VBbWbfA+4GWp1zq+L5gQrq1DU+7thzoptN+1vZtL+VHQ2dOAeF4SxuvKqUW5aU8dYlpcwrzPG6VJFZ5UqD+magF/h3BXX66egbZsvBdjYfaGPzgTZae4YAWFQa4a1LSnnL4lLWLyqhIKybH4hciStufZhZDfC4gjq9Oec40NLLlvo2XjjYzsuHTzMwMkaGwarKAtYvLmH9ohLeXFNMJKTbjIlcCgW1JMTw6Djbj3fw4qFTvHionR0NnYyMOQIZxpuqCrhhUQnXLyymtqaYXAW3yEUlJajNbAOwAaC6uvraY8eOXV61krL6h0fZeiwa3C8fPsXOxi5Gx93kjPu6mmLevLCY2gVFWlEichbNqMUTE8H96pHTvHzkNDsaOhkaHQdgUVmE2gVF1C4oZt2CIhaXRbTxRtLaxYJav49KwoSDAW5aUsZNS8oAGBodY3dTF68ejYb3b/a28EhdIwBF4SzWVhdx7YIi1lYXsrqqUH1ukZh4Vn08DNwKlAItwF855757sT+jGbXEY3zccbi9l7qjHWw73sG2450cbO0FIMNgaUUea6sLWTO/kDXzi7iqPFdruWXW0oYXSRld/SNsb+hg+/FOth3v4LWGTroHRwGIBDNZWVnAmvmFXFNVwOqqQqqKctQykVlBrQ9JGQXhrNhuyHIguiTwSHsfOxo62dnYxY6GTr7/u6MMj0V73YXhLN5UWTD5WFVZoPCWWUdBLb5mZiwqy2VRWS7vWVcFRJcFHmjp4bXGTnY1drGzsYuNmw8zOh797bAwnMWqeQWsrMxn5bwCVs7LZ2FJhAy1TSRFKagl5QQDGayKzZ65Pvq1wZEx9jX3sKupi70nutjd1M33XjjCyFg0vMPBTJbPzWf53DxWzC1g+dw8rp6TT04w08ORiMRHPWqZtYZHx6lv7WHPiW72TjxOdtM7FO15m8HC0gjL5+RPBvfVc/OoLFTrRJJPPWpJS8FARqz1UTD5tfFxR2PHAHtPdvN67LGrqYtf7zo5eU5eKMCyOXmTj6UVeSyryKMoEvRiGCKaUYsA9A6Nsr+5m33NPexv7mHfyR72NXdPrjgBKMsLsawijyUVuSytyGNJeS5LKvIoyNEFqeTKaUYtMo3cUIBrFxRz7YLiya8552jpHmJfczf1Lb3sb+nhQEsPP3mlgYGRscnzyvNCLK3I46ry3DMeJZGgWigyIxTUIhdgZswpyGZOQfbkckGItk+aOgc40NJDfWsv9S291Lf28EhdA/3DbwR4YTiLq8pyWVwWDe7F5REWleZSVZRDQLc8k0ug1ofIDHHOcaJrkIOtvZOPQ629HGzr5XTf8OR5wcwMFpSEWVQWiS49LH3jo/rg6UutD5EkMDMqC3OoLMzhlqVlZzzX0TfM4fZeDrX2cSj2sb61l2deb51c/w3RWfjC0ggLSyMsKo1QUxqhpiT6UZeKTV965UWSoCgS5NrImT1wgNGxcRo6Bjjc1suR9j6OtPdxuK2P3x86xS+2NZ1xblleiJqS8GRwL4gdV5eEyc/WG5qzmYJaxEOBzIzJGfTZ+odHOdrez9FTfdFHex9HT/Xz/IE2Ht3aeMa5xZEgC0rCLCgOU10SiX2Mfl6WF9KbmilOQS3iU+FggBXz8lkxL/+c5/qGRjl+up9jp6LhfexU9PjVox089toJpnRTyM7KoLo4THVxmPkTH4uix/OLcwgHFQN+p1dIJAVFQoHYlvhzQ3x4dJzGjn6One6n4XQ/x0+9cfz7Q6fom7IyBaAkEqSqOMz8opxoeBeFqYodzyvMJhTQNnuvKahFZplgIGPyQlZnc85xum+Yho4BjsfCu+F0P40dA+xq6uLJ3c1nvLkJUJEforIwh6qiMJVFOVQV5cQ+z6GyMKzrpSSBglokjZgZJbkhSnJDrJlfeM7zY+OOlu7BaIB3DNDUMUBjRzTIdzR08sSuk+cEeUkkyLzYapd5hTnMK8ymqmjiOEcbf2aAglpEJmVm2GTAXn+e5yeCvKkzGuJNnQOTxwfbenn+QNsZuzYBQoGMyQCfW5DDvIJs5hbmMLcgm3mxj3latXJRCmoRidvUIH9zzbnPO+fo7B+hqXOAE7FHU+cAJ7oGOdk5wAv17bT2DHLWpJy8UIC5sSCfWzDlY2E2cwuyqchP7zBXUIvIjDEziiJBiiLB6PXCz2NkbJyW7kFOdg1yonOAk7EQP9kV/dqeE1209w6f8+dyQ4Holv787PN+rMjPpiQSnJU3iFBQi0hSZWVmUFUUpqoofMFzhkbHaO0eioX3wGSwn+wcpLl78IIz80CGUZ4XoiIW4BWTjxBz8rMpjx3nhgIp1TePK6jN7A7g60Am8B3n3D8ktCoRSWuhQGZsnfeFw3x0bJz23mGauwdp7hqktSf6sbl7kJbuQQ609LClvn3yRhFThYOZVORnU5YXigZ5Xojy/NDk18rzsinPD5Hnk0CfNqjNLBN4CLgdaAReNbPHnHN7E12ciMiFBDIzJq9uyPwLn9c3NEpLdzTA23qGosddQ7T2DNLaPcRrDZ209gwyODJ+zp/NzsqIhnYsyMvzJoI8RFnsUZ6XTXEkSGYCWy7xzKivAw465w4DmNlPgPsABbWI+F4kFLjguvIJzjl6hkZp7Y6Gd2vPG0E+cbyvOTpD7xk8d4aeYVCSG2JhSYRHHlw/42OIJ6grgYYpnzfCuSt3zGwDsAGgurp6RooTEUkGMyM/O4v87CyuKs+76LkDw2O090bDu60nGuRtsUeiuiQz9maic24jsBGi16Oeqe8rIuInOcHp++czLZ7bTDRxZgeoKvY1ERFJgniC+lVgiZktNLMg8ADwWGLLEhGRCdO2Ppxzo2b2GeAposvzvuec25PwykREBIizR+2cewJ4IsG1iIjIeehWyCIiPqegFhHxOQW1iIjPKahFRHzOnJv5vSlm1gYcu4w/Wgq0z3A5qSAdx52OY4b0HHc6jhkufdwLnHNl53siIUF9ucyszjlX63UdyZaO407HMUN6jjsdxwwzO261PkREfE5BLSLic34L6o1eF+CRdBx3Oo4Z0nPc6ThmmMFx+6pHLSIi5/LbjFpERM6ioBYR8TlPgtrM7jCz/WZ20Mz+8jzPh8zsp7HnXzazGg/KnHFxjPtzZrbXzHaa2TNmtsCLOmfSdGOect4fmpkzs1mxjCuecZvZ/bHXe4+Z/TjZNc60OP5+V5vZJjPbHvs7fpcXdc4kM/uembWa2e4LPG9m9o3Yf5OdZrbusn6Qcy6pD6KXSj0ELAKCwGvAirPO+TTwf2PHDwA/TXadHo37bUA4dvypVB93PGOOnZcHbAZeAmq9rjtJr/USYDtQFPu83Ou6kzDmjcCnYscrgKNe1z0D474ZWAfsvsDzdwH/CRhwA/Dy5fwcL2bUkzfLdc4NAxM3y53qPuDfYsc/A95ufrhn+5WZdtzOuU3Ouf7Ypy8RvZtOKovntQb4EvB/gMFkFpdA8Yz7T4CHnHMdAM651iTXONPiGbMD8mPHBcCJJNaXEM65zcDpi5xyH/DvLuoloNDM5l7qz/EiqM93s9zKC53jnBsFuoCSpFSXOPGMe6pPEP0/cSqbdsyxXwXnO+d+nczCEiye13opsNTMfmdmL5nZHUmrLjHiGfP/Bj5sZo1Er2//Z8kpzVOX+u/+vGbs5rYyc8zsw0AtcIvXtSSSmWUAXwX+yONSvBAg2v64lehvTpvN7E3OuU4vi0qwDwDfd859xczWAz8ws1XOuXGvC/M7L2bU8dwsd/IcMwsQ/TXpVFKqS5y4bhJsZn8AfBG41zk3lKTaEmW6MecBq4DnzOwo0R7eY7PgDcV4XutG4DHn3Ihz7ghwgGhwp6p4xvwJ4BEA59zvgWyiFy6azWbk5uBeBHU8N8t9DPhY7Pi9wLMu1plPYdOO28zWAt8mGtKp3rOEacbsnOtyzpU652qcczVE+/L3OufqvCl3xsTzd/xXRGfTmFkp0VbI4STWONPiGfNx4O0AZracaFC3JbXK5HsM+Ghs9ccNQJdz7uQlfxeP3im9i+gM4hDwxdjX/oboP1KIvoCPAgeBV4BFXr+7m6RxPw20ADtij8e8rjnRYz7r3OeYBas+4nytjWjbZy+wC3jA65qTMOYVwO+IrgjZAbzD65pnYMwPAyeBEaK/JX0CeBB4cMrr/FDsv8muy/37rS3kIiI+p52JIiI+p6AWEfE5BbWIiM8pqEVEfE5BLSLicwpqERGfU1CLiPjc/weP2hhInFjrVwAAAABJRU5ErkJggg==\n"
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "x=np.arange(0.001,1,0.01) #0-1之间，步长0.01\n",
    "import matplotlib.pyplot as plt\n",
    "plt.plot(x,-np.log(x))  #以e为底部\n",
    "plt.show()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-19T02:56:32.623172500Z",
     "start_time": "2024-04-19T02:56:31.191990900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAAD4CAYAAADFAawfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAcVklEQVR4nO3deXSc1Z3m8e/Vvu+lxbJkWbblFbwgGwMJEBOCQwhLJw0kgZANk2SyTk8zk5NeMqRzTmY63ZA+HToh7GmapZMOMSHgAMEBDLax402SLduS5UVLaa/SYq1154+SHMPYVsmuqrdK9XzOqeOS66Xe31VJD9f3ve+9xlqLiIhErjinCxARkXNTUIuIRDgFtYhIhFNQi4hEOAW1iEiESwjFmxYUFNiKiopQvLWIyIy0c+fOTmut60yvhSSoKyoq2LFjRyjeWkRkRjLGHD3baxr6EBGJcApqEZEIp6AWEYlwCmoRkQinoBYRiXAKahGRCKegFhGJcApqEZEgeKXOzU//2BCS91ZQi4gEwcs1bTz5dlNI3ltBLSISBG7vEIVZKSF5bwW1iEgQuL1DFCuoRUQiV5t3iKKs5JC8t4JaROQCDY6M0Tc0RlG2etQiIhHJ7R0GoChTQS0iEpHc3iEAitWjFhGJTJNBrTFqEZEI9eegVo9aRCQiub3DpCXFk5Eckk2zFNQiIhfKPzUvBWNMSN5fQS0icoHaQziHGhTUIiIXbLJHHSoKahGRC2Ctxe0dDtnt46CgFhG5IL2Do4yM+UK2IBMoqEVELoi7b+JmFwW1iEhkOnX7uC4miohEJrcntDe7gIJaROSCTN6VWKgetYhIZGrzDpGXnkRyQnzIzqGgFhG5AG7vMIWZoetNg4JaROSCuL1DIVvedJKCWkTkAri9QyHbMGBSwEFtjIk3xuwyxvw2lAWJiESLsXEfnf3DIduCa9J0etTfBPaHqhARkWjT2T+Cz4Z2DjUEGNTGmNnAx4CHQ1qNiEgUaZvcMCBChj4eAO4FfGc7wBizwRizwxizo6OjIxi1iYhEtFDvlThpyqA2xtwAtFtrd57rOGvtQ9baamtttcvlClqBIiKRqj0MN7tAYD3qK4AbjTFNwDPAOmPMv4e0KhGRKNDmHSI+zlCQ7nBQW2u/Y62dba2tAG4H/mCtvSOkVYmIRIE2j/9ml7i40GzBNUnzqEVEzlNT1wBleWkhP8+0gtpau9lae0OoihERiRbWWurb+lhUnBnyc6lHLSJyHk70nKR/eIxFxVkhP5eCWkTkPNS39QGwUD1qEZHIVO9WUIuIRLT9rV5m56aSkZwQ8nMpqEVEzkO4LiSCglpEZNqGx8Zp7BwIy4VEUFCLiExbQ/sA4z4blvFpUFCLiEzbgTYvgIY+REQiVX1bH0nxcVQUpIflfApqEZFpOtDWx7zCDBLjwxOhCmoRkWmqb+tjcZiGPUBBLSIyLb2DI7R5h8J2IREU1CIi03IgjLeOT1JQi4hMw+QaH+GaQw0KahGRaTnQ1kd2amLIdx4/nYJaRGQaapo9LC7JxJjQ7upyOgW1iEiAvEOj1LZ4WDM3P6znVVCLiARoZ1MPPgtr5+aF9bwKahGRAG1t7CIpPo6V5blhPa+CWkQkQFsbu1hRlkNqUnxYz6ugFhEJQN/QKPuaPVxaGd5hD1BQi4gEZMfRifHpyvBeSAQFtYhIQLY2dpEYb1gV5vFpUFCLiARka2O3I+PToKAWEZlS//AYNc0eLg3z/OlJCmoRkSnsaOpm3GcdGZ8GBbWIyJS2Nnb7x6fn5DhyfgW1iMgU3mno5OLZOaQlJThyfgW1iMg5tHmG2HPCw4cWuhyrQUEtInIOv69rA2D9shLHalBQi4icw8s1bcwvzGB+YYZjNSioRUTOontghG1Hulm/tNjROhTUIiJn8Wqdm3GfZf0yBbWISER6ubaN0pxUls4K3/6IZzJlUBtjUowx240xe4wxtcaY/x2OwkREnNQ3NMpbhzpZv6w4rNtunUkgkwKHgXXW2n5jTCLwljHmJWvt1hDXJiLimNfrOxgZ9/FRh4c9IICgttZaoH/iy8SJhw1lUSIiTttU04YrM9mR1fLeL6AxamNMvDFmN9AOvGKt3XaGYzYYY3YYY3Z0dHQEuUwRkfDxDo3y2gE31y0tIi7O2WEPCDCorbXj1toVwGxgjTFm2RmOechaW22trXa5nLuDR0TkQv1mdwtDoz5urS5zuhRgmrM+rLW9wOvA+pBUIyISAZ7ZfozFJVlcVJrtdClAYLM+XMaYnInnqcC1wIEQ1yUi4oiaZg+1LV4+tabM8dkekwKZ9VECPGGMiccf7M9Za38b2rJERJzx9PZjJCfEcdPyUqdLOSWQWR97gZVhqEVExFGDI2Ns3N3Cxy4qITst0elyTtGdiSIiE17c20rf8Bi3rY6Mi4iTFNQiIhOeefc4lQXprJmb53Qp76GgFhEB9hzvZefRHj59aXnEXEScpKAWEQF++scGMlMSIm7YAxTUIiI0dvTzcm0bd66dQ2ZK5FxEnKSgFpGY9/M3G0mMj+PzV8x1upQzUlCLSExr9w7xq53NfPKS2bgyk50u54wU1CIS0x7d0sSYz8eGD1Y6XcpZKahFJGZ5Bkd5autRPrqshIqCdKfLOSsFtYjErAf/eJj+kTG+tm6+06Wck4JaRGJSq+ckj29p4pYVpSwucXZPxKkoqEUkJj3wyiGshW9fW+V0KVNSUItIzDnc3sd/7jzOZ9aWU5aX5nQ5U1JQi0jM+cdN9aQlJfC1D0X22PQkBbWIxJStjV1sqnVz9wcryc+IzHnT76egFpGYMTLm42+fr2F2biobrozcedPvF8gOLyIiM8KjW45wqL2fR+6qJjUp3ulyAqYetYjEhJbek/z41UN8eHER1ywucrqcaVFQi0hMuO+FOiyWv//4EqdLmTYFtYjMeJtq23i5to2vr1sQFdPx3k9BLSIzWlf/MN/99T6Wzsri7gheeOlcdDFRRGYsay1/83wN3pNj/PuXlpOUEJ190+isWkQkABv3tPBSTRvfvraKRcWRvZ7HuSioRWRGavMM8Xe/qWVleU5UzZk+EwW1iMw4Y+M+vvHMLkbHffzTXy4nPi6ydhWfLo1Ri8iMc/+rB9l+pJsHbltBpSvD6XIumHrUIjKj/PFgBz95vYHbV5dx88pSp8sJCgW1iMwYrZ6TfPvZ3SwqzuR7Ny51upygUVCLyIxwcmScDU/uZHh0nH/99CpSEqNnLY+paIxaRKKetZZ7f7WXmhYPP7+zmvmF0T8ufTr1qEUk6j24uYEX9rTw19ct5MNLomvBpUAoqEUkqr1c08o/bqrn5hWz+MpV85wuJyQU1CIStbYf6eYbz+xmVXkOP/zExRgT3fOlz0ZBLSJR6aC7jy898S6zc1N55K7VM+ri4ftNGdTGmDJjzOvGmDpjTK0x5pvhKExE5GxaPSe569HtJCfG88Tn15CbnuR0SSEVyKyPMeCvrLV/MsZkAjuNMa9Ya+tCXJuIyP+nvW+Iz/x8G31DYzx7z9qoXF96uqbsUVtrW621f5p43gfsB2bG7T4iElW6B0a44+FttHqGeOzzq1k6K9vpksJiWmPUxpgKYCWw7QyvbTDG7DDG7Ojo6AhSeSIifp7BUe58ZBtHuwZ55K5qVlfkOV1S2AQc1MaYDOBXwLestd73v26tfchaW22trXa5XMGsUURiXM/ACJ95ZCuH3P387M5LuHx+gdMlhVVAdyYaYxLxh/RT1tr/Cm1JIiJ/1tE3zJ2PbKOxc4Cf3XkJVy8sdLqksJsyqI1/YuIjwH5r7T+HviQREb82zxCffngrrb1DPPa51VwRYz3pSYEMfVwB3AmsM8bsnnhcH+K6RCTGNXT084l/e5t27zBPfnFNzIY0BNCjtta+BczM231EJCLtOtbDFx5/l/g4w9N3r+Wi2bExu+NstHqeiESUPxxw89+e2kVhVjJPfmENc/LTnS7JcQpqEYkYj285wn2/rWPJrCwe+9waXJnJTpcUERTUIuK4sXEf9/22jiffOcq1S4p44LYVpCcrnibpOyEijuodHOHrT+/izUOd3HNlJfeuXxT1u4YHm4JaRByzv9XLhl/swO0Z5v9+4mJuXV3mdEkRSUEtIo7YuKeF//nLvWSlJvDsPWtZWZ7rdEkRS0EtImE1PDbOP/x2P7/YepTqObk8eMcqCjNTnC4roimoRSRsjnUN8tX/2ElNs5cNV1by19ctJDFe+5dMRUEtImHxm93N/M2va4iLMzz82eoZuQltqCioRSSk+oZG+bvf1PLrXc1Uz8nl/ttWxMRi/8GkoBaRkNnW2MX/+OUemntO8q0PL+BrH5pPgoY6pk1BLSJBNzQ6zo821fPIliOU5abx3D2XUR1DC/0Hm4JaRIJq59Ee7v3lHho6BrhjbTnf+ehi3WV4gfTdE5GgGBge40e/r+fxt5soyUrhyS+s4coq7fYUDApqEblgr9e387fP13Ci5ySfvWwO965fRIZ60UGj76SInDe3d4j7XqjjxX2tVLrSee6ey1gzV2PRwaagFpFpGx338Yt3jnL/KwcZHvfxV9dWseGqSpIT4p0ubUZSUIvItLzT0MX3NtZS7+7jyioX9924lIoCLe4fSgpqEQnI8e5BfvjSAV7c18rs3FQeuvMSrl1ShH//awklBbWInFPf0Cj/trmBh986QrwxfOvDC/jyVfNISdQwR7goqEXkjEbGfDy9/Rg/fu0Q3QMj/MWqUu69bhHF2VrpLtwU1CLyHj6f5Xc1rfxoUz1NXYNcVpnPd65fxMWzc5wuLWYpqEUEAGstmw928KNN9dS2eFlYlMljn1vN1QtdGod2mIJaRHi7oZP7XznIu009lOWlcv9ty7lxean2LowQCmqRGLa1sYsHXj3I1sZuirNS+P7Ny7ituoykBK1wF0kU1CIxxlrLlsNd/MsfDrH9SDeuzGS+9/El3L6mXDM5IpSCWiRG+HyWV/e7eXBzA7uP91KclaKAjhIKapEZbnTcxwt7WvjpHxs46O6nLC+Vf7h5GX9ZPVu3fEcJBbXIDNU/PMYz24/xyFtHaPUMUVWUwQO3reCGi0u0y0qUUVCLzDAnegZ54u0mntl+nL7hMS6dm8cPblnG1VWFxGkWR1RSUIvMANZadh7t4bEtTbxc2wbA9ReV8KUPzGV5WY6zxckFU1CLRLGh0XFe2NPCE+80UdPsJSslgS9+YC53XV5BaU6q0+VJkCioRaLQsa5Bntp2lGd3HKd3cJQFhRn84JZl3LKylLQk/VrPNPpERaLE2LiP1w6089S2Y7x5qIM4Y/jIkiLuXDuHy+bl6zbvGWzKoDbGPArcALRba5eFviQROd3x7kGe23Gc53Ycx+0dpigrma+vW8Cn1pRRkq3hjVgQSI/6ceBfgSdDW4qITBoaHeeVOjfP7TjOW4c7AbiqysX3bypn3aJCTa+LMVMGtbX2DWNMRRhqEYlp1lpqmr38cudxnt/dgufkKKU5qXzzmgXcWl3GLF0cjFlBG6M2xmwANgCUl5cH621FZjy3d4jndzXzqz+d4KC7n6SEONYvLebW6jIun5evuc8SvKC21j4EPARQXV1tg/W+IjPRwPAYm2rb+PWuZrYc7sRnYVV5Dj+4ZRk3XDSL7LREp0uUCKJZHyJhMjLm481DHTy/u4VX6toYGvVRlpfK1z40n5tXllLpynC6RIlQCmqREBr3WbY2dvHCnhZeqmnDc3KU3LREPnnJbG5eUcolc3I1rU6mFMj0vKeBq4ECY8wJ4O+ttY+EujCRaDXus7zb1M2Le1t5qaaVzv4R0pPi+cjSYj6+vIQPLnCRqFkbMg2BzPr4VDgKEYlmY+M+tjd189K+Nl6ubaOjb5iUxDiuWVTExy4uYd2iQq35LOdNQx8i52l4bJy3D3fxck0br+x30z0wQmpiPOsWFbJ+WTHXLC7U7dwSFPopEpmGvqFRNtd3sKm2jc31HfQPj5GZnMC6xYVct7SYqxe6FM4SdPqJEplCq+ckr9a5+X2dm62NXYyOW/LTk7jh4hKuW1rM5fPztVOKhJSCWuR9fD7LvmYPr+138+r+dupavQDMLUjn81fM5dolRawqzyVeN6JImCioRQDv0ChvHerkDwfa2VzfQWf/MHEGqufk8Z2PLuKaxUXML9Q8Z3GGglpikrWWA219bK7vYHN9OzuP9jDms2SlJHDVwkLWLXJxdVUhuelJTpcqoqCW2NEzMMJbhzt542AHbxzqwO0dBmBxSRZ3X1nJ1VUuLpmTq5XpJOIoqGXGGhnzsetYD28e6uTNw53sPdGLtZCVksAHF7i4qsrFlVUuirNTnC5V5JwU1DJjWGs51N7Pm4c62XK4k22NXQyMjBNnYHlZDt9Yt4Arq1wsn52tXrNEFQW1RLXj3YO809DFloZO3m7ooqPPP5wxtyCdW1aV8oH5Li6bl092qlajk+iloJao0uo5ydbGLt5p6OLthi5O9JwEwJWZzOXz8rl8Xj5XzC9gdm6aw5WKBI+CWiJac+9JtjV2sa2xm21HumjqGgQgOzWRtZV53P3BSi6bl8+CwgytQiczloJaIoa1lqNdg2w/0s22I91sb+rieLe/x5yVksCauXncMbHj9uLiLO18IjFDQS2OGfdZDrR5efdIN+829bC9qfvUGHNeehJrKvL4/OVzubQyj0XFWboTUGKWglrCZmB4jN3He9nR1MOOo93sOtZL//AYALOyU7h8Xj5r5uaxpiKP+RrKEDlFQS0hYa3lRM9J/nSshz8d7WHnsR72t/Yx7rMYAwuLMrl55SxWV+RRXZFHqXbYFjkrBbUExcmRcfY1e9h1rMcfzsd6Tw1jpCXFs6Ish69ePY9L5uSysjxX0+VEpkFBLdPm81mOdA2w+1gvu473sPt4Lwda+xjz+TefL89L44p5+adCeVFxpm4wEbkACmqZUrt3iD0nPOw53sueE73sOd6Ld8g/tpyRnMDysmzuuaqSlWW5rCzPIT8j2eGKRWYWBbW8h2dwlH3NHvac6GXviV72nvDQ6hkCID7OUFWUyccunsXKshyWl+UwvzBDszFEQkxBHcM8J0epbfawr9nD3mYPNc0ejk7cUAJQkZ/G6oo8Lp6dzYqyHJbOyiY1STuZiISbgjpGdA+MUNviobbFy74zhPLs3FQuKs3m9tXlXDw7m2WzsslO0wU/kUigoJ5hrLW0eoaoa/FS2+KlpsVDXYuX5t6Tp44pzfGH8q3VZSwrzeai0mzytEC+SMRSUEexsXEfjZ0D1LV4qWv1ToSzh57BUQCM8a8id8mcXD572RyWlWazdFYWOWkKZZFooqCOEp7BUepaveyffLR5OejuZ2TMB0BSQhxVRRl8ZEkxS0uzWFKSxeKSLNKT9RGLRDv9FkeY0XEfjR0DHGjzcqCtjwOt/j8nZ14A5KcnsWRWFp+7vILFJZksKcmm0pVOouYqi8xICmqHWGtp8QxRPxHIB9v6ONDWR0NHP6Pj/htHEuMN81wZrK3MZ2FxJotLslhckokrI1nrYIjEEAV1iFlr6ewf4ZC7j4PuPurd/Rx0+4O5b2JBIoCS7BQWFWdy9cJCFhVnsqgkk8qCDJIS1EsWiXUK6iCx1tI1MMIhdz+H2v2h7H/eT/fAyKnjctISJxYkKmVhcSYLizOpKszUVDgROSsF9TRZa3F7hznc3s/h9j4OtfvD+JC779RsC4DMlASqijK5bmkRCwozqSrKpKooA1emhi1EZHoU1GcxNu7jWPcgDR0DHG7vp6Gj3/9ne/97hiyyUxNZUJjBdUuLWTARxgsKMynKUiCLSHDEfFB7h0Zp7BigscMfxg3tAzR09NPUNXDqoh5AYWYy81wZ3LKqlPmFGcx3ZTC/KEMX9kQk5GIiqMfGfTT3nqSxwx/CjZ2TwTxwas1k8C86NCcvjUpXBtcsLmKeK515hRnMc2Vo/WQRccyMCWprLR39wxzpGOBIp/8xGcjHugff0zvOSUuksiCdq6pcVLrSmefKYJ4rnfK8dM2yEJGIE1BQG2PWAz8G4oGHrbU/DGlVZ2GtpWdwlCOdAzR1DtDU9edQbuocYGBk/NSxSfFxzMlPY35hBtcuKabSlU5lQTqVrgytayEiUWXKoDbGxAM/Aa4FTgDvGmM2WmvrQlGQtZbugRGaugY52jVAU9cgTZ0DHJ0I5ckF6wHiDMzOTWNuQTqrK/KoyPcPW8wtSGdWTqrWSRaRGSGQHvUa4LC1thHAGPMMcBMQ1KAe91lueXALRzoG3jOrIs5AaW4qc/LSuXHFLCry05lbkE5FQTpluWkaqhCRGS+QoC4Fjp/29Qng0vcfZIzZAGwAKC8vn3Yh8XH+26VXlecyJz9t4qEwFhEJ2sVEa+1DwEMA1dXVdorDz+j+21YEqxwRkRkjkK5qM1B22tezJ/5ORETCIJCgfhdYYIyZa4xJAm4HNoa2LBERmTTl0Ie1dswY8zVgE/7peY9aa2tDXpmIiAABjlFba38H/C7EtYiIyBloOoWISIRTUIuIRDgFtYhIhFNQi4hEOGPted2bcu43NaYDOHqe/3kB0BnEcqJBLLYZYrPdsdhmiM12T7fNc6y1rjO9EJKgvhDGmB3W2mqn6winWGwzxGa7Y7HNEJvtDmabNfQhIhLhFNQiIhEuEoP6IacLcEAsthlis92x2GaIzXYHrc0RN0YtIiLvFYk9ahEROY2CWkQkwjkW1MaY9caYemPMYWPM/zrD68nGmGcnXt9mjKlwoMygCqDN/90YU2eM2WuMec0YM8eJOoNtqnafdtwnjDHWGBP107gCabMx5taJz7vWGPMf4a4x2AL4+S43xrxujNk18TN+vRN1BpMx5lFjTLsxpuYsrxtjzL9MfE/2GmNWndeJrLVhf+BfLrUBqASSgD3Akvcd81XgpxPPbweedaLWMLf5Q0DaxPOvRHubA233xHGZwBvAVqDa6brD8FkvAHYBuRNfFzpddxja/BDwlYnnS4Amp+sOQruvBFYBNWd5/XrgJcAAa4Ft53Mep3rUpzbMtdaOAJMb5p7uJuCJiee/BK4xxkTztuJTttla+7q1dnDiy634d9OJdoF81gDfB/4PMBTO4kIkkDbfDfzEWtsDYK1tD3ONwRZImy2QNfE8G2gJY30hYa19A+g+xyE3AU9av61AjjGmZLrncSqoz7RhbunZjrHWjgEeID8s1YVGIG0+3Rfx/5842k3Z7ol/DpZZa18MZ2EhFMhnXQVUGWO2GGO2GmPWh6260Aikzd8D7jDGnMC/vv3Xw1Oao6b7e39GQdvcVoLHGHMHUA1c5XQtoWaMiQP+Gficw6WEWwL+4Y+r8f/L6Q1jzEXW2l4niwqxTwGPW2v/yRhzGfALY8wya63P6cIinVM96kA2zD11jDEmAf8/lbrCUl1oBLRJsDHmw8B3gRuttcNhqi2Upmp3JrAM2GyMacI/jrcxyi8oBvJZnwA2WmtHrbVHgIP4gztaBdLmLwLPAVhr3wFS8C9cNJMFZXNwp4I6kA1zNwJ3TTz/JPAHOzE6H6WmbLMxZiXwM/whHe1jlpPO2W5rrcdaW2CtrbDWVuAfm7/RWrvDmXKDIpCf7+fx96YxxhTgHwppDGONwRZIm48B1wAYYxbjD+qOsFYZfhuBz07M/lgLeKy1rdN+Fwevll6PvxfRAHx34u/uw/9LCv4P8T+Bw8B2oNLpK7xhaPOrgBvYPfHY6HTN4Wj3+47dTJTP+gjwszb4h3zqgH3A7U7XHIY2LwG24J8Rshv4iNM1B6HNTwOtwCj+fyV9Efgy8OXTPuefTHxP9p3vz7ZuIRcRiXC6M1FEJMIpqEVEIpyCWkQkwimoRUQinIJaRCTCKahFRCKcglpEJML9P9lJZhElu/llAAAAAElFTkSuQmCC\n"
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "x=np.arange(0,1,0.01)\n",
    "import matplotlib.pyplot as plt\n",
    "plt.plot(x,-np.log(1-x))\n",
    "plt.show()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-19T02:56:51.087764200Z",
     "start_time": "2024-04-19T02:56:50.966836800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 699 entries, 0 to 698\n",
      "Data columns (total 11 columns):\n",
      " #   Column                       Non-Null Count  Dtype \n",
      "---  ------                       --------------  ----- \n",
      " 0   Sample code number           699 non-null    int64 \n",
      " 1   Clump Thickness              699 non-null    int64 \n",
      " 2   Uniformity of Cell Size      699 non-null    int64 \n",
      " 3   Uniformity of Cell Shape     699 non-null    int64 \n",
      " 4   Marginal Adhesion            699 non-null    int64 \n",
      " 5   Single Epithelial Cell Size  699 non-null    int64 \n",
      " 6   Bare Nuclei                  699 non-null    object\n",
      " 7   Bland Chromatin              699 non-null    int64 \n",
      " 8   Normal Nucleoli              699 non-null    int64 \n",
      " 9   Mitoses                      699 non-null    int64 \n",
      " 10  Class                        699 non-null    int64 \n",
      "dtypes: int64(10), object(1)\n",
      "memory usage: 60.2+ KB\n",
      "None\n",
      "        Sample code number  Clump Thickness  Uniformity of Cell Size  \\\n",
      "count         6.990000e+02       699.000000               699.000000   \n",
      "unique                 NaN              NaN                      NaN   \n",
      "top                    NaN              NaN                      NaN   \n",
      "freq                   NaN              NaN                      NaN   \n",
      "mean          1.071704e+06         4.417740                 3.134478   \n",
      "std           6.170957e+05         2.815741                 3.051459   \n",
      "min           6.163400e+04         1.000000                 1.000000   \n",
      "25%           8.706885e+05         2.000000                 1.000000   \n",
      "50%           1.171710e+06         4.000000                 1.000000   \n",
      "75%           1.238298e+06         6.000000                 5.000000   \n",
      "max           1.345435e+07        10.000000                10.000000   \n",
      "\n",
      "        Uniformity of Cell Shape  Marginal Adhesion  \\\n",
      "count                 699.000000         699.000000   \n",
      "unique                       NaN                NaN   \n",
      "top                          NaN                NaN   \n",
      "freq                         NaN                NaN   \n",
      "mean                    3.207439           2.806867   \n",
      "std                     2.971913           2.855379   \n",
      "min                     1.000000           1.000000   \n",
      "25%                     1.000000           1.000000   \n",
      "50%                     1.000000           1.000000   \n",
      "75%                     5.000000           4.000000   \n",
      "max                    10.000000          10.000000   \n",
      "\n",
      "        Single Epithelial Cell Size Bare Nuclei  Bland Chromatin  \\\n",
      "count                    699.000000         699       699.000000   \n",
      "unique                          NaN          11              NaN   \n",
      "top                             NaN           1              NaN   \n",
      "freq                            NaN         402              NaN   \n",
      "mean                       3.216023         NaN         3.437768   \n",
      "std                        2.214300         NaN         2.438364   \n",
      "min                        1.000000         NaN         1.000000   \n",
      "25%                        2.000000         NaN         2.000000   \n",
      "50%                        2.000000         NaN         3.000000   \n",
      "75%                        4.000000         NaN         5.000000   \n",
      "max                       10.000000         NaN        10.000000   \n",
      "\n",
      "        Normal Nucleoli     Mitoses       Class  \n",
      "count        699.000000  699.000000  699.000000  \n",
      "unique              NaN         NaN         NaN  \n",
      "top                 NaN         NaN         NaN  \n",
      "freq                NaN         NaN         NaN  \n",
      "mean           2.866953    1.589413    2.689557  \n",
      "std            3.053634    1.715078    0.951273  \n",
      "min            1.000000    1.000000    2.000000  \n",
      "25%            1.000000    1.000000    2.000000  \n",
      "50%            1.000000    1.000000    2.000000  \n",
      "75%            4.000000    1.000000    4.000000  \n",
      "max           10.000000   10.000000    4.000000  \n"
     ]
    }
   ],
   "source": [
    "\"\"\"\n",
    "逻辑回归做二分类进行癌症预测（根据细胞的属性特征）\n",
    ":return: NOne\n",
    "\"\"\"\n",
    "# 构造列标签名字\n",
    "column = ['Sample code number', 'Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape',\n",
    "          'Marginal Adhesion', 'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin', 'Normal Nucleoli',\n",
    "          'Mitoses', 'Class']\n",
    "\n",
    "# 读取数据\n",
    "# data = pd.read_csv(\n",
    "#     \"https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data\",\n",
    "#     names=column)\n",
    "data = pd.read_csv(\n",
    "    \"./data/breast-cancer-wisconsin.csv\",\n",
    "    names=column)\n",
    "# print(data) #想看可以解除\n",
    "#当你读取数据时，看上去是数值的列，读进来是字符串，说明里边\n",
    "# 存在了非数值情况\n",
    "print(data.info())\n",
    "print(data.describe(include='all'))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-15T01:55:03.093548600Z",
     "start_time": "2024-07-15T01:55:03.041054700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [
    {
     "data": {
      "text/plain": "array(['1', '10', '2', '4', '3', '9', '7', '?', '5', '8', '6'],\n      dtype=object)"
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['Bare Nuclei'].unique() #因为上面发现它是object类型"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-15T01:56:49.920250800Z",
     "start_time": "2024-07-15T01:56:49.907683200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------\n",
      "(683, 11)\n"
     ]
    }
   ],
   "source": [
    "# 缺失值进行处理\n",
    "data = data.replace(to_replace='?', value=np.nan)\n",
    "#直接删除，哪一行有空值，就删除对应的样本\n",
    "data = data.dropna()\n",
    "print('-' * 50)\n",
    "print(data.shape)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-15T01:57:03.926771200Z",
     "start_time": "2024-07-15T01:57:03.922092200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Index: 683 entries, 0 to 698\n",
      "Data columns (total 11 columns):\n",
      " #   Column                       Non-Null Count  Dtype \n",
      "---  ------                       --------------  ----- \n",
      " 0   Sample code number           683 non-null    int64 \n",
      " 1   Clump Thickness              683 non-null    int64 \n",
      " 2   Uniformity of Cell Size      683 non-null    int64 \n",
      " 3   Uniformity of Cell Shape     683 non-null    int64 \n",
      " 4   Marginal Adhesion            683 non-null    int64 \n",
      " 5   Single Epithelial Cell Size  683 non-null    int64 \n",
      " 6   Bare Nuclei                  683 non-null    object\n",
      " 7   Bland Chromatin              683 non-null    int64 \n",
      " 8   Normal Nucleoli              683 non-null    int64 \n",
      " 9   Mitoses                      683 non-null    int64 \n",
      " 10  Class                        683 non-null    int64 \n",
      "dtypes: int64(10), object(1)\n",
      "memory usage: 64.0+ KB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-15T01:57:09.245343900Z",
     "start_time": "2024-07-15T01:57:09.235642Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [
    {
     "data": {
      "text/plain": "'Class'"
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "column[10]"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-19T03:30:01.069265900Z",
     "start_time": "2024-04-19T03:30:01.046281900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [
    {
     "data": {
      "text/plain": "array([2, 4], dtype=int64)"
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#分类的类别是2和4,对应 \"良性\", \"恶性\"\n",
    "data[column[10]].unique()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-15T01:57:17.227352500Z",
     "start_time": "2024-07-15T01:57:17.216105200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Index: 683 entries, 0 to 698\n",
      "Data columns (total 11 columns):\n",
      " #   Column                       Non-Null Count  Dtype \n",
      "---  ------                       --------------  ----- \n",
      " 0   Sample code number           683 non-null    int64 \n",
      " 1   Clump Thickness              683 non-null    int64 \n",
      " 2   Uniformity of Cell Size      683 non-null    int64 \n",
      " 3   Uniformity of Cell Shape     683 non-null    int64 \n",
      " 4   Marginal Adhesion            683 non-null    int64 \n",
      " 5   Single Epithelial Cell Size  683 non-null    int64 \n",
      " 6   Bare Nuclei                  683 non-null    object\n",
      " 7   Bland Chromatin              683 non-null    int64 \n",
      " 8   Normal Nucleoli              683 non-null    int64 \n",
      " 9   Mitoses                      683 non-null    int64 \n",
      " 10  Class                        683 non-null    int64 \n",
      "dtypes: int64(10), object(1)\n",
      "memory usage: 64.0+ KB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-19T03:31:25.684610800Z",
     "start_time": "2024-04-19T03:31:25.625645200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "outputs": [
    {
     "data": {
      "text/plain": "'Bare Nuclei'"
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "column[6]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T01:59:15.570276500Z",
     "start_time": "2024-07-15T01:59:15.558242200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [],
   "source": [
    "#把第6列的字符串转化为数字类型\n",
    "data[column[6]] = data[column[6]].astype('int16')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T02:00:19.888433900Z",
     "start_time": "2024-07-15T02:00:19.878877600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Index: 683 entries, 0 to 698\n",
      "Data columns (total 11 columns):\n",
      " #   Column                       Non-Null Count  Dtype\n",
      "---  ------                       --------------  -----\n",
      " 0   Sample code number           683 non-null    int64\n",
      " 1   Clump Thickness              683 non-null    int64\n",
      " 2   Uniformity of Cell Size      683 non-null    int64\n",
      " 3   Uniformity of Cell Shape     683 non-null    int64\n",
      " 4   Marginal Adhesion            683 non-null    int64\n",
      " 5   Single Epithelial Cell Size  683 non-null    int64\n",
      " 6   Bare Nuclei                  683 non-null    int16\n",
      " 7   Bland Chromatin              683 non-null    int64\n",
      " 8   Normal Nucleoli              683 non-null    int64\n",
      " 9   Mitoses                      683 non-null    int64\n",
      " 10  Class                        683 non-null    int64\n",
      "dtypes: int16(1), int64(10)\n",
      "memory usage: 60.0 KB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T02:00:22.598735Z",
     "start_time": "2024-07-15T02:00:22.588740300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [
    {
     "data": {
      "text/plain": "array([-1.21629973, -0.70863282, -0.75174943,  0.04301674, -0.55657068,\n       -0.71054972, -0.99312055, -0.62911518, -0.36280962])"
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 进行数据的分割,第零列是编号，不可以作为特征，把第1-9列作为特征，第10列作为标签\n",
    "x_train, x_test, y_train, y_test = train_test_split(data[column[1:10]], data[column[10]], test_size=0.25,\n",
    "                                                    random_state=1)\n",
    "\n",
    "# 进行标准化处理\n",
    "std = StandardScaler()\n",
    "\n",
    "x_train = std.fit_transform(x_train) #训练集标准化\n",
    "x_test = std.transform(x_test) #测试集标准化\n",
    "x_train[0]"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-15T02:02:11.874025300Z",
     "start_time": "2024-07-15T02:02:11.857095200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1.11400191 0.25293086 0.78938469 0.60986034 0.0728013  1.10834397\n",
      "  0.7794668  0.64312128 0.67692658]]\n",
      "[2 2 2 4 2 4 2 2 4 4 2 2 4 2 2 4 2 2 2 2 2 2 2 4 4 2 2 2 4 4 2 2 4 4 2 4 2\n",
      " 2 4 4 4 2 2 4 2 2 2 2 4 2 2 2 4 2 2 2 4 2 2 2 2 4 2 2 2 4 2 4 4 2 2 4 2 2\n",
      " 4 2 2 2 2 2 2 2 4 2 4 4 2 2 2 4 2 2 4 2 2 4 4 2 2 4 2 2 4 4 2 2 2 2 4 2 4\n",
      " 4 2 4 2 4 2 2 2 2 4 2 4 2 2 2 2 2 4 2 2 2 2 2 2 2 4 2 4 4 2 2 4 2 2 2 2 4\n",
      " 2 2 2 2 2 4 2 4 2 4 2 2 4 2 4 2 4 4 2 4 2 2 2]\n",
      "准确率： 0.9824561403508771\n",
      "444    2\n",
      "24     2\n",
      "195    2\n",
      "49     4\n",
      "375    2\n",
      "Name: Class, dtype: int64\n",
      "--------------------------------------------------\n",
      "[[0.94893919 0.05106081]\n",
      " [0.99494175 0.00505825]\n",
      " [0.98365149 0.01634851]\n",
      " [0.02707911 0.97292089]\n",
      " [0.99732446 0.00267554]]\n"
     ]
    }
   ],
   "source": [
    "#\n",
    "# # 逻辑回归预测\n",
    "# C正则化力度\n",
    "# solver = 'liblinear'  solver是学习率优化算法，就是学习率会随着epoch的变化而变化\n",
    "#epoch就代表第几次迭代\n",
    "#max_iter 最大迭代次数\n",
    "lg = LogisticRegression(C=0.5, solver='lbfgs')\n",
    "#\n",
    "lg.fit(x_train, y_train)\n",
    "# 逻辑回归的权重参数，了解，没那么重要\n",
    "print(lg.coef_)\n",
    "\n",
    "y_predict = lg.predict(x_test)\n",
    "print(y_predict) #预测的标签\n",
    "print(\"准确率：\", lg.score(x_test, y_test))\n",
    "print(y_test[0:5])\n",
    "print('-'*50)\n",
    "print(lg.predict_proba(x_test)[0:5])  #得出对应分类的概率\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-15T02:04:51.879445300Z",
     "start_time": "2024-07-15T02:04:51.841605500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "          良性       0.97      1.00      0.99       111\n",
      "          恶性       1.00      0.95      0.97        60\n",
      "\n",
      "    accuracy                           0.98       171\n",
      "   macro avg       0.99      0.97      0.98       171\n",
      "weighted avg       0.98      0.98      0.98       171\n",
      "\n",
      "AUC指标： 0.975\n"
     ]
    }
   ],
   "source": [
    "# 为什么还要看下召回率，labels和target_names对应\n",
    "# macro avg 平均值  weighted avg 加权平均值\n",
    "print(classification_report(y_test, y_predict, labels=[2, 4], target_names=[\"良性\", \"恶性\"]))\n",
    "#AUC计算要求是二分类，不需要是0和1\n",
    "print(\"AUC指标：\", roc_auc_score(y_test, y_predict))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-15T02:11:09.297190100Z",
     "start_time": "2024-07-15T02:11:09.260366800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(512, 9)\n",
      "(171, 9)\n"
     ]
    }
   ],
   "source": [
    "print(x_train.shape)\n",
    "print(x_test.shape)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.3025850929940455\n"
     ]
    }
   ],
   "source": [
    "print(-np.log(0.1))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "outputs": [
    {
     "data": {
      "text/plain": "0.10536051565782628"
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "-np.log(0.9)\n",
    "\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "outputs": [
    {
     "data": {
      "text/plain": "3.55"
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x=-3/10\n",
    "y=5*x**2+3*x+4\n",
    "y"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
